
##############################################################################
############ Replication Code for RCW 2012  ##################################
##############################################################################

#########################################
############ SETTING UP  ################
#########################################
library(foreign)
library(Zelig)
library(MatchIt)
library(xtable)
library(survey)
library(sandwich)
library(lmtest)
library(systemfit)
library(cem)
library(mvtnorm)
library(micEcon)
set.seed(60607)

####Function to cluster standard errors (we'll need this later)
cl   <- function(dat,fm, cluster){
             attach(dat, warn.conflicts = F)
             M <- length(unique(cluster))   
             N <- length(cluster)  	        
             K <- fm$rank		             
             dfc <- (M/(M-1))*((N-1)/(N-K))  
             uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum))
             vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
             detach(dat)
             return(coeftest(fm, vcovCL))
             }
             
             
######################################################################################
############  Read in RCW (our data) and CKK (the authors' data)  ####################
######################################################################################

#Download the Czaika and Kis-Katos 2009 data from Sage Publication - doi: 10.1177/0022343309102659 

#set this working directory to where you have put the podes_proc.dta (CKK) and revisiting3.dta (RCW) files
setwd()
RCW <- read.dta(file="RCW.dta")

#set this working directory to where you have put the podes_proc.dta (CKK) and revisiting3.dta (RCW) files
setwd()
CKK <- read.dta(file="podes_proc.dta")


######################################################################################
############  Replication of OLS Analysis using CKK data (Table 1)  ##############
######################################################################################

#i.e. What the authors (CKK) did in their original paper (Table III in Czaika and Kis Katos (2009))

###########  OLS Regression of conflict on dpop (CKK) ###########

ckk_c_conflict_j <- lm(dpop ~ conflict + urban + altitude + remote + poor00 + couples00 + station03 + police00 + share_Jawa + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK)

#####Add clustered standard errors at subdistrict (kabkec) level
ckk_c_conflict_j_cl <- cl(CKK, ckk_c_conflict_j, CKK$kabkec)

##Update the SEs of the original lm object
ckk_c_conflict_j$se <- ckk_c_conflict_j_cl[,2]

###########  OLS Regression of high_cshare (conflict cluster) on dpop (CKK) ###########

ckk_high_cshare_j <- lm(dpop ~ high_cshare + urban + altitude + remote + poor00 + couples00 + station03 + police00 + share_Jawa + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK)

#####Add clustered standard errors at subdistrict (kabkec) level
ckk_high_cshare_j_cl <- cl(CKK, ckk_high_cshare_j, CKK$kabkec)

##Update the SEs of the original lm object
ckk_high_cshare_j$se <- ckk_high_cshare_j_cl[,2]

###########  Using this one can generate Table XXXXXX ###########


#########################################################################################
#######  Replication of OLS Analysis using both CKK and RCW data (Table 2)  #########
#########################################################################################

#i.e. Compare the analysis above to: 
		# 1) OLS analysis without share_jawa (still using CKK)
		# 2) OLS analysis without share_jawa (using RCW)
		
		

###########  OLS Regression of conflict on dpop (CKK) without share_jawa ###########

ckk_c_conflict <- lm(dpop ~ conflict + urban + altitude + remote + poor00 + couples00 + station03 + police00 + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK)

#####Add clustered standard errors
ckk_c_conflict_cl <- cl(CKK, ckk_c_conflict, CKK$kabkec)

##Update the SEs of the original lm object
ckk_c_conflict$se <- ckk_c_conflict_cl[,2]


###########  OLS Regression of high_cshare on dpop (CKK) without share_jawa ###########

ckk_high_cshare <- lm(dpop ~ high_cshare + urban + altitude + remote + poor00 + couples00 + station03 + police00 + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK)

#####Add clustered standard errors
ckk_high_cshare_cl <- cl(CKK, ckk_high_cshare, CKK$kabkec)

##Update the SEs of the original lm object
ckk_high_cshare$se <- ckk_high_cshare_cl[,2]

###########  OLS Regression of conflict on dpop (RCW) without share_jawa ###########

subset_2003 <- subset(RCW, year==2003)

c_conflict_03 <- lm(ck_dpop ~ c_conflict + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 + ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003)

#####Add clustered standard errors
c_conflict_03_cl <- cl(subset_2003, c_conflict_03, subset_2003$sub_district)

##Update the SEs of the original lm object
c_conflict_03$se <- c_conflict_03_cl[,2]
             
#############  OLS Regression of high_cshare on dpop (RCW) without share_jawa ###########
           
subset_2003 <- subset(RCW, year==2003)

high_cshare_03 <- lm(ck_dpop ~ high_cshare + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003)
 
##Add clustered standard errors
high_cshare_03_cl <- cl(subset_2003, high_cshare_03, subset_2003$sub_district)             

##Update the SEs of the original lm object
high_cshare_03$se <- high_cshare_03_cl[,2]


  ###########  Using this one can generate Table 2 ###########



################################################################################
#######  Summary Statistics of CKK and RCW  -- Table 3 #####################
################################################################################

CKK_means <- c(mean(CKK$dpop), mean(CKK$conflict), mean(CKK$high_cshare), mean(CKK$urban), mean(CKK$altitude), mean(CKK$remote), mean(CKK$poor00), mean(CKK$couples00), mean(CKK$station03), mean(CKK$police00), mean(CKK$share_Jawa), mean(CKK$kab_Tengah), mean(CKK$kab_Barat), mean(CKK$kab_NaganRaya), mean(CKK$kab_BaratDaya), mean(CKK$kab_Selatan), mean(CKK$kab_Utara), mean(CKK$kab_Timur), mean(CKK$ kab_Tamiang), mean(CKK$kab_Langsa), mean(CKK$kab_Lhokseumawe), mean(CKK$kab_BandaAceh), mean(CKK$pop00))

CKK_sds <- c(sd(CKK$dpop), sd(CKK$conflict), sd(CKK$high_cshare), sd(CKK$urban), sd(CKK$altitude), sd(CKK$remote), sd(CKK$poor00), sd(CKK$couples00), sd(CKK$station03), sd(CKK$police00), sd(CKK$share_Jawa), sd(CKK$kab_Tengah), sd(CKK$kab_Barat), sd(CKK$kab_NaganRaya), sd(CKK$kab_BaratDaya), sd(CKK$kab_Selatan), sd(CKK$kab_Utara), sd(CKK$kab_Timur), sd(CKK$ kab_Tamiang), sd(CKK$kab_Langsa), sd(CKK$kab_Lhokseumawe), sd(CKK$kab_BandaAceh), sd(CKK$pop00))

RCW_means <- c(mean(subset_2003$ck_dpop), mean(subset_2003$c_conflict), mean(subset_2003$high_cshare), mean(subset_2003$urban0308), mean(subset_2003$ck_altitude), mean(subset_2003$ck_remote), mean(subset_2003$povrate0005), mean(subset_2003$ck_couples00), mean(subset_2003$ck_station03), mean(subset_2003$c_police0005), mean(subset_2003$ck_kab_Tengah), mean(subset_2003$ck_kab_Barat), mean(subset_2003$ck_kab_NaganRaya), mean(subset_2003$ck_kab_BaratDaya), mean(subset_2003$ck_kab_Selatan), mean(subset_2003$ck_kab_Utara), mean(subset_2003$ck_kab_Timur), mean(subset_2003$ck_kab_Tamiang), mean(subset_2003$ck_kab_Langsa), mean(subset_2003$ck_kab_Lhokseumawe), mean(subset_2003$ck_kab_BandaAceh), mean(subset_2003$pop_lag))

RCW_sds <- c(sd(subset_2003$ck_dpop), sd(subset_2003$c_conflict), sd(subset_2003$high_cshare), sd(subset_2003$urban0308), sd(subset_2003$ck_altitude), sd(subset_2003$ck_remote), sd(subset_2003$povrate0005), sd(subset_2003$ck_couples00), sd(subset_2003$ck_station03), sd(subset_2003$c_police0005), sd(subset_2003$ck_kab_Tengah), sd(subset_2003$ck_kab_Barat), sd(subset_2003$ck_kab_NaganRaya), sd(subset_2003$ck_kab_BaratDaya), sd(subset_2003$ck_kab_Selatan), sd(subset_2003$ck_kab_Utara), sd(subset_2003$ck_kab_Timur), sd(subset_2003$ck_kab_Tamiang), sd(subset_2003$ck_kab_Langsa), sd(subset_2003$ck_kab_Lhokseumawe), sd(subset_2003$ck_kab_BandaAceh), sd(subset_2003$pop_lag))

###### Using this one can generate the Summary Statistics on Table 3 ########


#############################################################################################
####  Estimates of Effect of Conflict on Population Change (OLS) Table 4 ###############
#############################################################################################

#column 1 CKK (from above)
ckk_2000_2003_c <- ckk_c_conflict_cl["conflict",]
ckk_2000_2003_cc <- ckk_high_cshare_cl["high_cshare",]             

#column 2 RCW (from above)
rcw_2000_2003_c <- c_conflict_03_cl["c_conflict",]
rcw_2000_2003_cc <- high_cshare_03_cl["high_cshare",]

#column 3 RCW panel:

#############  OLS Regression of conflict on dpop (RCW) with 2008 dummy ###########
             
c_conflict_panel <- lm(ck_dpop ~ c_conflict + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 + ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu + yr2008 , data=RCW)
 
##Add clustered standard errors
c_conflict_panel_cl <- cl(RCW, c_conflict_panel, RCW$sub_district)             

#Update the SEs of the original lm object
c_conflict_panel$se <- c_conflict_panel_cl[,2]

#############  OLS Regression of conflict cluster on dpop (RCW) with 2008 dummy ###########

high_cshare_panel <- lm(ck_dpop ~ high_cshare + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu + yr2008 , data=RCW)
 
##Add clustered standard errors
high_cshare_panel_cl <- cl(RCW, high_cshare_panel, RCW$sub_district)     

#Update the SEs of the original lm object
high_cshare_panel$se <- high_cshare_panel_cl[,2]
             
##column 3 values
rcw_panel_c <- c_conflict_panel_cl["c_conflict",]             
rcw_panel_cc <- high_cshare_panel_cl["high_cshare",]

##################### Using this you can generate Table 4 ######################  


###################################################################################
################## NEW QOI: First Differences Table 5 ##########################
###################################################################################

###Column 1 -- CKK 200/2003

###Be sure to set.seed each time!

##This is the seed used to generate Table 4
set.seed(84109)

### First Differences with conflict 
set.seed(84109)
x.low <- setx(ckk_c_conflict, conflict=0)
x.high <- setx(ckk_c_conflict, conflict=1)
s.out <- sim(ckk_c_conflict, x.low, x1=x.high)
ckk_fd_c_data <- s.out$qi$fd #we will use this later for density plots
ci_90 <- summary(s.out, CI=90)$qi.stats$fd #to see if significant at 90%
ckk_fd_c <- summary(s.out)$qi.stats$fd #this goes in Table 4

### First Differences with conflict cluster
set.seed(84109)
x.low <- setx(ckk_high_cshare, high_cshare=0)
x.high <- setx(ckk_high_cshare, high_cshare=1)
s.out <- sim(ckk_high_cshare, x.low, x1=x.high)
ckk_fd_cc_data <- s.out$qi$fd
ci_90 <- summary(s.out, CI=90)$qi.stats$fd
ckk_fd_cc <- summary(s.out)$qi.stats$fd

#####Column 2 -- RCW 2000/2003

##### First Differences with conflict
set.seed(84109)
x.low <- setx(c_conflict_03, c_conflict=0)
x.high <- setx(c_conflict_03, c_conflict=1)
s.out <- sim(c_conflict_03, x.low, x1=x.high)
rcw_fd_c_data <- s.out$qi$fd
ci_90 <- summary(s.out, CI=90)$qi.stats$fd
rcw_fd_c <- summary(s.out)$qi.stats$fd

##### First Differences using conflict clusters
set.seed(84109)
x.low <- setx(high_cshare_03, high_cshare=0)
x.high <- setx(high_cshare_03, high_cshare=1)
s.out <- sim(high_cshare_03, x.low, x1=x.high)
ci_90 <- summary(s.out, CI=90)$qi.stats$fd
rcw_fd_cc_data <- s.out$qi$fd
rcw_fd_cc <- summary(s.out)$qi.stats$fd


####Column 3 -- RCW Panel

### First Differences using conflict
set.seed(84109)
x.low <- setx(c_conflict_panel, c_conflict=0)
x.high <- setx(c_conflict_panel, c_conflict=1)
s.out <- sim(c_conflict_panel, x.low, x1=x.high)
panel_fd_c_data <- s.out$qi$fd
ci_90 <- summary(s.out, CI=90)$qi.stats$fd
panel_fd_c <- summary(s.out)$qi.stats$fd

#### First Differences using conflict cluster
set.seed(84109)
x.low <- setx(high_cshare_panel, high_cshare=0)
x.high <- setx(high_cshare_panel, high_cshare=1)
s.out <- sim(high_cshare_panel, x.low, x1=x.high)
ci_90 <- summary(s.out, CI=90)$qi.stats$fd
panel_fd_cc_data <- s.out$qi$fd
panel_fd_cc <- summary(s.out)$qi.stats$fd


####### Using this you can generate Table 5 ###############



###################################################################################
################## Non-Parametric (matching) Table 6 ##########################
###################################################################################


################## Matching CKK conflict ##########################

#For non-binary vars, choose cutpoints:  25th, 50th, 75th quantile as cutpoints
popcuts <- quantile(CKK$pop00,c(0, 0.25,0.5, 0.75, 1))
couplescuts <- quantile(CKK$couples00, c(0, 0.25, 0.5, 0.75, 1))
poorcuts <- quantile(CKK$poor00, c(0, 0.25, 0.5, 0.75, 1))

#Run CEM to get weights
cem_out <- matchit(conflict ~ pop00 + couples00 + poor00 + altitude + urban, data=CKK, method="cem", cutpoints = list(pop00 = popcuts,  couples00 = couplescuts, poor00 = poorcuts))

##Run original regression with weights from cem_out
ckk_mat_c_conflict_03 <- lm(dpop ~ conflict + urban + altitude + remote + poor00 + couples00 + station03 + police00 + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK, weights=cem_out$w)

#Add clustered standard errors
ckk_mat_c_conflict_03_cl <- cl(CKK, ckk_mat_c_conflict_03, CKK$kabkec)

#This goes into table 6
ckk_mat_c <- ckk_mat_c_conflict_03_cl["conflict",]

################## Matching CKK conflict cluster ##########################

#keep cutpoints and data the same as above

#Run CEM to get weights
cem_out <- matchit(high_cshare ~ pop00 + couples00 + poor00 + altitude + urban, data=CKK, method="cem", cutpoints = list(pop00 = popcuts,  couples00 = couplescuts, poor00 = poorcuts))

#Run original regression with weights from cem_out
ckk_mat_high_cshare_03 <- lm(dpop ~ high_cshare + urban + altitude + remote + poor00 + couples00 + station03 + police00 + kab_Tengah + kab_Barat + kab_NaganRaya + kab_BaratDaya + kab_Selatan + kab_Utara + kab_Timur +  kab_Tamiang + kab_Langsa + kab_Lhokseumawe + kab_BandaAceh + pop00 + popsq00 + pop3_00 + pop4_00, data=CKK, weights=cem_out$w)

#Add clustered standard errors
ckk_mat_high_cshare_03_cl <- cl(CKK, ckk_mat_high_cshare_03, CKK$kabkec)

#This goes into table 6
ckk_mat_cc <- ckk_mat_high_cshare_03_cl["high_cshare",]

################## Matching RCW 2000/2003 conflict ##########################


#use the subset of the data from 2003
subset_2003 <- subset(RCW, year==2003)


#25th, 50th, 75th quantile as cutpoints for the non-binary variables
popcuts <- quantile(subset_2003$pop_lag,c(0, 0.25,0.5, 0.75, 1))
couplescuts <- quantile(subset_2003$ck_couples00, c(0, 0.25, 0.5, 0.75, 1))
poorcuts <- quantile(subset_2003$povrate0005, c(0, 0.25, 0.5, 0.75, 1))

#Use CEM to get weights
cem_out <- matchit(c_conflict ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308, data=subset_2003, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

#Run original regression with weights from cem_out
rcw_mat_c_conflict_03 <-  lm(ck_dpop ~ c_conflict + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 + 
 ck_kab_Tengah + ck_kab_Barat + ck_kab_BaratDaya + ck_kab_Selatan +  ck_kab_Utara + ck_kab_Timur + ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + ck_kab_NaganRaya + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003, weights=cem_out$w)

#Add clustered standard errors
rcw_mat_c_conflict_03_cl <- cl(subset_2003, rcw_mat_c_conflict_03, subset_2003$sub_district)

#Update original lm object
rcw_mat_c_conflict_03$se <- rcw_mat_c_conflict_03_cl[,2]

#This goes into table 6
rcw_mat_c <- rcw_mat_c_conflict_03_cl["c_conflict",]


################## Matching RCW 2000/2003 conflict cluster ##########################

#keep cutpoints the same as above

#Use CEM to get weights
cem_out <- matchit(high_cshare ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308, data=subset_2003, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

#Run original regression with weights from cem_out
rcw_mat_high_cshare_03 <-  lm(ck_dpop ~ high_cshare + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_BaratDaya + ck_kab_Selatan +  ck_kab_Utara + ck_kab_Timur + ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + ck_kab_NaganRaya + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003, weights=cem_out$w)

#Add clustered standard errors
rcw_mat_high_cshare_03_cl <- cl(subset_2003, rcw_mat_high_cshare_03, subset_2003$sub_district)

#Update original lm object
rcw_mat_high_cshare_03$se <- rcw_mat_high_cshare_03_cl[,2]

#This goes in Table 6
rcw_mat_cc <- rcw_mat_high_cshare_03_cl["high_cshare",]


################## Matching RCW Panel conflict ##########################

#Match on yr2008 in the panel dataset

#25th, 50th, 75th quantile as cutpoints
popcuts <- quantile(RCW$pop_lag,c(0, 0.25,0.5, 0.75, 1))
couplescuts <- quantile(RCW$ck_couples00, c(0, 0.25, 0.5, 0.75, 1))
poorcuts <- quantile(RCW$povrate0005, c(0, 0.25, 0.5, 0.75, 1))

#Use CEM to get weights
cem_out <- matchit(c_conflict ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308 + yr2008, data=RCW, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

#Run original regression with weights from cem_out
rcw_c_conflict_panel <-  lm(ck_dpop ~ c_conflict + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_BaratDaya + ck_kab_Selatan +  ck_kab_Utara + ck_kab_Timur + ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + ck_kab_NaganRaya + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu + yr2008, data=RCW, weights=cem_out$w)

#Add clustered standard errors
rcw_c_conflict_panel_cl <- cl(RCW, rcw_c_conflict_panel, RCW$sub_district)

#This goes in Table 6 
rcw_panel_mat_c <- rcw_c_conflict_panel_cl["c_conflict",]

################## Matching RCW Panel conflict cluster ##########################

#keep cutpoints the same as above

#Use CEM to get weights
cem_out <- matchit(high_cshare ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308 + yr2008, data=RCW, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

#Run original regression with weights from cem_out
rcw_high_cshare_panel <-  lm(ck_dpop ~ high_cshare + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_BaratDaya + ck_kab_Selatan +  ck_kab_Utara + ck_kab_Timur + ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + ck_kab_NaganRaya + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu + yr2008, data=RCW, weights=cem_out$w)

#Add clustered standard errors
rcw_high_cshare_panel_cl <- cl(RCW, rcw_high_cshare_panel, RCW$sub_district)

#This goes in Table 6
rcw_panel_mat_cc <- rcw_high_cshare_panel_cl["high_cshare",]


########### Using this data you can generate table 6 ########


###################################################################################
################## First Differences DENSITY PLOTS ################################
###################################################################################

########################## RCW 2000/2003 FDs -- Figure 1 ################

##### First Differences with conflict
##Calculated above
#rcw_fd_c_data 

##### First Differences using conflict cluster
##Calculated above
#rcw_fd_cc_data 


### First Differences with cshare 
#Do OLS analysis
subset_2003 <- subset(RCW, year==2003)

cshare_03 <- lm(ck_dpop ~ cshare + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 + ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003)
 
#Add clustered standard errors
cshare_03_cl <- cl(subset_2003, cshare_03, subset_2003$sub_district)

#Update origianl lm object
cshare_03$se <- cshare_03_cl[,2]

#use zelig for FDs, N.B. We use movement from 25th to 75th percentile because continuous
set.seed(84109)
x.low <- setx(cshare_03, cshare=quantile(subset_2003$cshare,0.25))
x.high <- setx(cshare_03, cshare=quantile(subset_2003$cshare,0.75))
s.out <- sim(cshare_03, x.low, x1=x.high)
rcw_fd_cshare_data <- s.out$qi$fd
rcw_fd_cshare <- summary(s.out)$qi.stats$fd

#rcw_fd_cshare_data



##### First Differences using dead
#Do OLS analysis:
subset_2003 <- subset(RCW, year==2003)

dead_03 <- lm(ck_dpop ~ dead + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003)
 
#Add clustered standard errors
dead_03_cl <- cl(subset_2003, dead_03, subset_2003$sub_district)

#Update original lm object
dead_03$se <- dead_03_cl[,2]

#Use zelig to get fds
set.seed(84109)
x.low <- setx(dead_03, dead=0)
x.high <- setx(dead_03, dead=1)
s.out <- sim(dead_03, x.low, x1=x.high)
rcw_fd_dead_data <- s.out$qi$fd
rcw_fd_dead <- summary(s.out)$qi.stats$fd


##### First Differences with RCW pct_dead

#Do OLS analysis
subset_2003 <- subset(RCW, year==2003)

pct_dead_03 <- lm(ck_dpop ~ pct_dead + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_NaganRaya + ck_kab_BaratDaya + ck_kab_Selatan + ck_kab_Utara + ck_kab_Timur +  ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh +  pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003)
 
##Add clustered standard errors
pct_dead_03_cl <- cl(subset_2003, pct_dead_03, subset_2003$sub_district)

#Update original lm object
pct_dead_03$se <- pct_dead_03_cl[,2]

#Use zelig for FDs N.B. We use movement from 25th to 75th percentile because continuous
set.seed(84109)
x.low <- setx(pct_dead_03, pct_dead=quantile(subset_2003$pct_dead,0.25))
x.high <- setx(pct_dead_03, pct_dead=quantile(subset_2003$pct_dead,0.75))
s.out <- sim(pct_dead_03, x.low, x1=x.high)
rcw_fd_pct_dead_data <- s.out$qi$fd
rcw_fd_pct_dead <- summary(s.out)$qi.stats$fd

#rcw_fd_pct_dead_data


####### Make Figure 1 #######

plot(density(rcw_fd_c_data), xlim=c(-1,1), ylim=c(0,5), col="red", lwd=2, main="First Differences in village population using different measures of conflict (RCW)")
lines(density(rcw_fd_cc_data), col="blue", lwd=2)
lines(density(rcw_fd_cshare_data), col="orange", lwd=2)
lines(density(rcw_fd_dead_data), col="green", lwd=2)
lines(density(rcw_fd_pct_dead_data), col="purple", lwd=2)
legend("topright", inset=.05, title="Measures of Conflict", c("conflict","conflict cluster","conflict share","dead", "percent dead"), fill=c("red", "blue","orange", "green", "purple"))




########################## RCW Matched 2000/2003 FDs -- Figure 2 ################

#### First Differences with conflict
set.seed(84109)
x.low <- setx(rcw_mat_c_conflict_03, c_conflict=0)
x.high <- setx(rcw_mat_c_conflict_03, c_conflict=1)
s.out <- sim(rcw_mat_c_conflict_03, x.low, x1=x.high)
rcw_mat_fd_c_data <- s.out$qi$fd
rcw_mat_fd_c <- summary(s.out)$qi.stats$fd

##### First Differences using conflict cluster
set.seed(84109)
x.low <- setx(rcw_mat_high_cshare_03, high_cshare=0)
x.high <- setx(rcw_mat_high_cshare_03, high_cshare=1)
s.out <- sim(rcw_mat_high_cshare_03, x.low, x1=x.high)
rcw_mat_fd_cc_data <- s.out$qi$fd
rcw_mat_fd_cc <- summary(s.out)$qi.stats$fd

##### First Differences using dead

##Do Matching using dead
subset_2003 <- subset(RCW, year==2003)

#25th, 50th, 75th quantile as cutpoints
popcuts <- quantile(subset_2003$pop_lag,c(0, 0.25,0.5, 0.75, 1))
couplescuts <- quantile(subset_2003$ck_couples00, c(0, 0.25, 0.5, 0.75, 1))
poorcuts <- quantile(subset_2003$povrate0005, c(0, 0.25, 0.5, 0.75, 1))

cem_out <- matchit(dead ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308 + yr2008, data=subset_2003, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

#Run OLS on matched data set
rcw_mat_dead_03 <-  lm(ck_dpop ~ dead + urban0308 + ck_altitude + ck_remote + povrate0005 + ck_couples00 + ck_station03 + c_police0005 +  ck_kab_Tengah + ck_kab_Barat + ck_kab_BaratDaya + ck_kab_Selatan +  ck_kab_Utara + ck_kab_Timur + ck_kab_Tamiang + ck_kab_Langsa + ck_kab_Lhokseumawe + ck_kab_BandaAceh + ck_kab_NaganRaya + pop_lag + pop_lag_sq + pop_lag_cu + pop_lag_qu , data=subset_2003, weights=cem_out$w)

#Add clustered standard errors
rcw_mat_dead_03_cl <- cl(subset_2003, rcw_mat_dead_03, subset_2003$sub_district)

#update original lm object
rcw_mat_dead_03$se <-  rcw_mat_dead_03_cl[,2]

#Use zelig to get FDs
set.seed(84109)
x.low <- setx(rcw_mat_dead_03, dead=0)
x.high <- setx(rcw_mat_dead_03, dead=1)
s.out <- sim(rcw_mat_dead_03, x.low, x1=x.high)
rcw_mat_fd_dead_data <- s.out$qi$fd
rcw_mat_fd_dead <- summary(s.out)$qi.stats$fd




####### Make Figure 2 #######

plot(density(rcw_mat_fd_c_data), xlim=c(-1,1), ylim=c(0,3), col="red", lwd=2, main="First Differences in village population using different measures of conflict (RCW Matched)")
lines(density(rcw_mat_fd_cc_data), col="blue", lwd=2)
lines(density(rcw_mat_fd_dead_data), col="green", lwd=2)
legend("topright", inset=.05, title="Measures of Conflict", c("conflict","conflict cluster","dead"), fill=c("red", "blue", "green"))


###################################################################################
################## ROBUSTNESS CHECKS -- Love Plots ################################
###################################################################################

########################## Love Plot RCW conflict -- Figure 3 ################

cem_out <- matchit(c_conflict ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308, data=subset_2003, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

pre_balance <-summary(cem_out)$sum.all
post_balance <-summary(cem_out)$sum.matched
bal_lab <- rownames(pre_balance)

plot(x = (pre_balance[,1] - pre_balance[,2])/pre_balance[,3],
y = 1:nrow(pre_balance), pch = 19, col = "burlywood2",
xlab = "Standardized Imbalance", axes = FALSE,
main="Balance improvement due to matching -- RCW with independent variable of conflict",
xlim = c(-0.3,0.5), ylab = "")
points(x = (post_balance[,1] - post_balance[,2])/post_balance[,3],
y = 1:nrow(post_balance), pch = 19, col = "cadetblue2")
abline(v = 0, lty = "dashed")
axis(1)
axis(2, labels = c("Distance","Population", "Couples", "Poverty", "Altitude", "Urban"), las=1, at = 6:1, cex.axis = 0.65)
legend("topright", legend = c("Pre-match","Post-match"),
pch = c(19,19),
col = c("burlywood2","cadetblue2"), cex = .8)

########################## Love Plot RCW conflict cluster -- Figure 4 ################

cem_out <- matchit(high_cshare ~ pop_lag + ck_couples00 + povrate0005 + ck_altitude + urban0308, data=subset_2003, method="cem", cutpoints = list(pop_lag = popcuts,  ck_couples00 = couplescuts, povrate0005 = poorcuts))

pre_balance <-summary(cem_out)$sum.all
post_balance <-summary(cem_out)$sum.matched
bal_lab <- rownames(pre_balance)

plot(x = (pre_balance[,1] - pre_balance[,2])/pre_balance[,3],
y = 1:nrow(pre_balance), pch = 19, col = "burlywood2",
xlab = "Standardized Imbalance", axes = FALSE,
main="Balance improvement due to matching -- RCW with independent variable of conflict cluster",
xlim = c(-0.3,0.5), ylab = "")
points(x = (post_balance[,1] - post_balance[,2])/post_balance[,3],
y = 1:nrow(post_balance), pch = 19, col = "cadetblue2")
abline(v = 0, lty = "dashed")
axis(1)
axis(2, labels = c("Distance","Population", "Couples", "Poverty", "Altitude", "Urban"), las=1, at = 6:1, cex.axis = 0.65)
legend("topright", legend = c("Pre-match","Post-match"),
pch = c(19,19),
col = c("burlywood2","cadetblue2"), cex = .8)

###################################################################################
################## ROBUSTNESS CHECKS -- Residual Plots ############################
###################################################################################


#RCW residuals with conflict as independent variable
plot(curve(dnorm, from=-15, to=15, type="l"), type="l", col="red", xlim=c(-15, 15), main="Distribution of residuals (RCW) conflict as independent variable", xlab="Distribution of residuals from linear regression", ylab="Density")
lines(density(resid(c_conflict_03)), lwd=2, col="blue", lty=2) 
lines(density(resid(rcw_mat_c_conflict_03)), lwd=2, col="green", lty=2) 
legend("topright", legend=c("Standard Normal", "Density of Residual Distribution (before matching)", "Density of Residual Distribution (after matching)"), pch = c(19,19), col=c("red", "blue", "green"), cex=0.6) 
 

#RCW residuals with conflict cluster as independent variable
plot(curve(dnorm, from=-15, to=15, type="l"), type="l", col="red", xlim=c(-15, 15), main="Distribution of residuals (RCW) conflict cluster as independent variable", xlab="Distribution of residuals from linear regression", ylab="Density")
lines(density(resid(high_cshare_03)), lwd=2, col="blue", lty=3) 
lines(density(resid(rcw_mat_high_cshare_03)), lwd=2, col="green", lyt=3) 
legend("topright", legend=c("Standard Normal", "Density of Residual Distribution (before matching)", "Density of Residual Distribution (after matching)"), pch = c(19,19), col=c("red", "blue", "green"), cex=0.6)  


###Looking at an individual subdistricts Unmatched
resids <- resid(c_conflict_03)
sub_districts <- subset_2003$sub_district
resid_by_subd <-cbind(resids, sub_districts)

#There are 198 subdistricts
#Randomly choose 10 and plot the distribution of their residuals

set.seed(84109) 
subds <- sample(1:198, 10)

##Look at sub_district 144
resid1 <- subset(resid_by_subd, sub_districts==subds[1])[,1]
resid2 <- subset(resid_by_subd, sub_districts==subds[2])[,1]
resid3 <- subset(resid_by_subd, sub_districts==subds[3])[,1]
resid4 <- subset(resid_by_subd, sub_districts==subds[4])[,1]
resid5 <- subset(resid_by_subd, sub_districts==subds[5])[,1]
resid6 <- subset(resid_by_subd, sub_districts==subds[6])[,1]
resid7 <- subset(resid_by_subd, sub_districts==subds[7])[,1]
resid8 <- subset(resid_by_subd, sub_districts==subds[8])[,1]
resid9 <- subset(resid_by_subd, sub_districts==subds[9])[,1]
resid10 <- subset(resid_by_subd, sub_districts==subds[10])[,1]


plot(curve(dnorm, from=-15, to=15, type="l"), type="l", col=2, xlim=c(-8, 8),ylim=c(0,0.8), main="Distribution of residuals by sub district (RCW) conflict as independent variable", xlab="Distribution of residuals from linear regression", ylab="Density", lwd=3)
lines(density(resid1), lwd=2, col=3, lty=2) 
lines(density(resid2), lwd=2, col=4, lty=2) 
lines(density(resid3), lwd=2, col=5, lty=2) 
lines(density(resid4), lwd=2, col=6, lty=2) 
lines(density(resid5), lwd=2, col=7, lty=2) 
lines(density(resid6), lwd=2, col=8, lty=2) 
lines(density(resid7), lwd=2, col=9, lty=2) 
legend("topright", legend=c("Standard Normal", paste("Sub District", subds[1]), paste("Sub District", subds[2]), paste("Sub District", subds[3]), paste("Sub District", subds[4]), paste("Sub District", subds[5]), paste("Sub District", subds[6]), paste("Sub District", subds[7])), pch = c(19,19), col=c(2:10), cex=0.8) 


###Looking at an individual subdistricts matched
resids <- resid(rcw_mat_c_conflict_03)
sub_districts <- subset_2003$sub_district
resid_by_subd <-cbind(resids, sub_districts)

#There are 198 subdistricts
#Randomly choose 10 and plot the distribution of their residuals

set.seed(84109) 
subds <- sample(1:198, 10)

##Look at sub_district 144
resid1 <- subset(resid_by_subd, sub_districts==subds[1])[,1]
resid2 <- subset(resid_by_subd, sub_districts==subds[2])[,1]
resid3 <- subset(resid_by_subd, sub_districts==subds[3])[,1]
resid4 <- subset(resid_by_subd, sub_districts==subds[4])[,1]
resid5 <- subset(resid_by_subd, sub_districts==subds[5])[,1]
resid6 <- subset(resid_by_subd, sub_districts==subds[6])[,1]
resid7 <- subset(resid_by_subd, sub_districts==subds[7])[,1]
resid8 <- subset(resid_by_subd, sub_districts==subds[8])[,1]
resid9 <- subset(resid_by_subd, sub_districts==subds[9])[,1]
resid10 <- subset(resid_by_subd, sub_districts==subds[10])[,1]


plot(curve(dnorm, from=-15, to=15, type="l"), type="l", col=2, xlim=c(-8, 8),ylim=c(0,0.8), main="Distribution of residuals by sub district (RCW Matched) conflict as independent variable", xlab="Distribution of residuals from linear regression", ylab="Density", lwd=3)
lines(density(resid1), lwd=2, col=3, lty=2) 
lines(density(resid2), lwd=2, col=4, lty=2) 
lines(density(resid3), lwd=2, col=5, lty=2) 
lines(density(resid4), lwd=2, col=6, lty=2) 
lines(density(resid5), lwd=2, col=7, lty=2) 
lines(density(resid6), lwd=2, col=8, lty=2) 
lines(density(resid7), lwd=2, col=9, lty=2) 
legend("topright", legend=c("Standard Normal", paste("Sub District", subds[1]), paste("Sub District", subds[2]), paste("Sub District", subds[3]), paste("Sub District", subds[4]), paste("Sub District", subds[5]), paste("Sub District", subds[6]), paste("Sub District", subds[7])), pch = c(19,19), col=c(2:10), cex=0.8) 
 
 



